## # A tibble: 5 x 6
## state total_vote r_points pct_trump party census
## <chr> <dbl> <dbl> <dbl> <chr> <chr>
## 1 Wyoming 255849 46.3 68.2 Republican West
## 2 Rhode Island 464144 -15.5 38.9 Democrat Northeast
## 3 Pennsylvania 6166710 0.71 48.2 Republican Northeast
## 4 Vermont 315067 -26.4 30.3 Democrat Northeast
## 5 Oregon 2001336 -11.0 39.1 Democrat West
The FIPS code is a federal code that numbers states and territories of the United States. It extends to the county level with an additional four digits, so every U.S county has a unique 6 digit identifier, where the first two digits represent the state.
The first thing you should keep in mind is that spatial data does not have to be represented spatially.
Figure 7.2: 2016 election results using a dotplot
party_colors <- c("#2E74C0","#CB454A")
p0 <- ggplot(data = subset(election, st %nin% "DC"),
mapping = aes(x = r_points,
y = reorder(state, r_points),
color = party))
p1 <- p0 + geom_vline(xintercept = 0, color = "gray30") +
geom_point(size = 2)
p2 <- p1 + scale_color_manual(values = party_colors)
p3 <- p2 + scale_x_continuous(
breaks = seq(-30,40,by = 10),
labels = c("30\nClinton","20",10,0,
"10","20","30","40\n(Trump)"))
p3 + facet_wrap(~census, ncol = 1, scales = "free_y") +
guides(color = FALSE) + labs(x = "Point Margin", y = "") +
theme(axis.text = element_text(size =8))
##maps package provides coordinates for some predrawn map data
library(maps)
us_states <- map_data("state")
head(us_states)
## long lat group order region subregion
## 1 -87.46201 30.38968 1 1 alabama <NA>
## 2 -87.48493 30.37249 1 2 alabama <NA>
## 3 -87.52503 30.37249 1 3 alabama <NA>
## 4 -87.53076 30.33239 1 4 alabama <NA>
## 5 -87.57087 30.32665 1 5 alabama <NA>
## 6 -87.58806 30.32665 1 6 alabama <NA>
Figure 7.3: A first US map
p <- ggplot(data = us_states,
mapping = aes(x = long,
y = lat,
group = group))
p + geom_polygon(fill = "white",
color = "black")
Figure 7.4: Coloring the states
p <- ggplot(data = us_states,
aes(x = long,
y = lat,
group = group,
fill = region))
p + geom_polygon(color = "gray90",
size =0.1) +
guides(fill = FALSE)
We can transform the default projection used by geom_polygon() via the coord_map() function.
The Albers projection requires two additional parameters, lat0 and lat1, passed to the geom_polygon function.
Figure 7.5: Improving the projection using an Albers projection
p <- ggplot(data =us_states,
mapping = aes(x = long,
y = lat,
group = group,
fill = region))
p + geom_polygon(color = "gray90",
size =0.1) +
coord_map(projection = "albers",lat0= 39,lat1 = 45) +
guides(fill = FALSE)
Figure 7.6: Mapping the results
##Merging the coordinates with our election data
election$region <- tolower(election$state)
us_states_elec <- left_join(us_states, election)
p <- ggplot(data = us_states_elec,
aes(x = long,
y = lat,
group = group,
fill = party))
p + geom_polygon(color = "gray90",size =0.1) +
coord_map(projection = "albers",
lat0 = 39,
lat1 = 45) +
theme(legend.position = "top",
legend.text = element_text(size = 8),
legend.title = element_text(size =10)) +
guides(fill = guide_legend(title = "Party", title.position = "top"))
Figure 7.7: Election 2016 by state
party_colors <- c("#2E74C0","#CB454A")
p0 <- ggplot(data = us_states_elec,
mapping = aes(x = long,
y = lat,
group = group,
fill = party))
p1 <- p0 + geom_polygon(color = "gray90",
size =0.1) +
coord_map(projection = "albers",
lat0 = 39,
lat1 = 45)
p2 <- p1 + scale_fill_manual(values = party_colors) +
labs(title = "Election Results 2016", fill = NULL)
p2 +
theme_minimal() +
theme(
legend.position = c(0.1,0),##Use numbers to justify the legend
legend.text = element_text(face = "bold"),
axis.title = element_blank(),
axis.text = element_blank(),
panel.grid = element_blank()
)
Figure 7.8: Two versions of percent Trump by state
p0 <- ggplot(data = us_states_elec,
mapping = aes(x = long,
y = lat,
group = group,
fill = pct_trump))
p1 <- p0 + geom_polygon(color = "gray90", size = 0.1) +
coord_map(projection = "albers", lat0 = 39, lat1 = 45)
##First graph
p1 + labs(title = "Trump vote") +
theme_minimal() +
theme(
##Use numbers to justify the legend
legend.text = element_text(face = "bold"),
axis.title = element_blank(),
axis.text = element_blank(),
panel.grid = element_blank()
) +
labs(fill = "Percent")
##Second graph
p2 <- p1 + scale_fill_gradient(low = "white",
high = "#CB454A") +
labs(title = "Trump vote")
p2 + theme_minimal() +
theme(
##Use numbers to justify the legend
legend.text = element_text(face = "bold"),
axis.title = element_blank(),
axis.text = element_blank(),
panel.grid = element_blank()
) + labs(fill = "Percent")
Figure 7.9: Changing midpoint of scale_gradient2
##Plot 1
p0 <- ggplot(data = us_states_elec,
mapping = aes(x = long,
y = lat,
group = group,
fill = d_points))
p1 <- p0 + geom_polygon(color = "gray90", size = 0.1) +
coord_map(projection = "albers", lat0 = 39, lat1 = 45)
p2 <- p1 + scale_fill_gradient2() + labs(title = "Winning margins")
p2 + theme_minimal() +
theme(
##Use numbers to justify the legend
legend.text = element_text(face = "bold"),
axis.title = element_blank(),
axis.text = element_blank(),
panel.grid = element_blank()
) + labs(fill = "Percent")
##Plot 2
p3 <- p1 + scale_fill_gradient2(low = "red",
mid =scales::muted("purple"),
high = "blue",
breaks = c(-25,0,25,75)) +
labs(title = "Winning margins")
p3 + theme_minimal() +
theme(
##Use numbers to justify the legend
legend.text = element_text(face = "bold"),
axis.title = element_blank(),
axis.text = element_blank(),
panel.grid = element_blank()
) + labs(fill = "Percent")
#Inspecting county data
county_map %>% sample_n(5) %>% round_df(2)
## long lat order hole piece group id
## 1 1432700.4 -1100515.1 42186 FALSE 1 0500000US13121.1 13121
## 2 -436014.9 -550267.6 30728 FALSE 1 0500000US08014.1 08014
## 3 -524074.7 -605648.1 33120 FALSE 1 0500000US08117.1 08117
## 4 2041784.8 -479076.2 82177 FALSE 1 0500000US24037.1 24037
## 5 1704756.7 -464603.2 180441 FALSE 1 0500000US54001.1 54001
county_data %>%
select(id, name, state, pop_dens, pct_black) %>%
sample_n(5)
## id name state pop_dens pct_black
## 2778 48419 Shelby County TX [ 10, 50) [15.0,25.0)
## 1428 28001 Adams County MS [ 50, 100) [50.0,85.3]
## 309 08113 San Miguel County CO [ 0, 10) [ 0.0, 2.0)
## 2102 39043 Erie County OH [ 100, 500) [ 5.0,10.0)
## 911 20009 Barton County KS [ 10, 50) [ 0.0, 2.0)
county_full <- left_join(county_map, county_data, by = "id")
Figure 7.11: U.S Population density by county
Figure 7.12: Percent black population by county
p <- ggplot(data = county_full,
mapping = aes(x = long,
y = lat,
fill = pct_black,
group = group))
p1 <- p + geom_polygon(color = "black", size = 0.05) + coord_equal()
p2 <- p1 + scale_fill_brewer(palette = "Greens")
p2 + labs(fill = "US Population, Percent Black") +
guides(fill = guide_legend(nrow = 1)) +
theme_minimal() +
theme(
##Use numbers to justify the legend
legend.text = element_text(face = "bold",size = 8),
legend.title = element_text(size = 10),
axis.title = element_blank(),
axis.text = element_blank(),
panel.grid = element_blank()
) + theme(legend.position = "bottom")
## [1] "#FEEDDE" "#FDD0A2" "#FDAE6B" "#FD8D3C" "#E6550D" "#A63603"
## [1] "#A63603" "#E6550D" "#FD8D3C" "#FDAE6B" "#FDD0A2" "#FEEDDE"
Figure 7.13.1: Gun related suicides by county
Figure 7.13.2: Population density by county
Normally, we standardize frequency to control for the fact that larger populations have higher numbers just because they have more people in them.
However, this sort of standardization has its limits. In particular, when the event of interest is not very common, and there is wide variation in the base size of the unites, then the denominator starts to be epressed more and ore in the standardized measure.
Data is subject to reporting constraints connected to population size.
Small differences in reporting, combined with coarse binning and miscoding, will produce spatially misleading and substantively mistaken results.
As an alternative to state-level choropleths, it is possible to consider state-bins.
Statebins needs, as arguments, the basic data frame (state_data argument), a vector of state names(state_col), and the value being shown (value_col). In addition, we can optionally tell it the color palette we want and the color of the text to label the state boxes.
Figure 7.14.1: Statebins of election results (percent Trump)
library(statebins)
statebins_continuous(state_data = election,
state_col = "state",
text_color = "white",
value_col = "pct_trump",
brewer_pal = "Reds",
font_size = 3,
legend_title = "Percent Trump")
Figure 7.14.2: Statebins of election results (percent Clinton)
statebins_continuous(state_data = subset(election, !st%in% "DC"),
state_col = "state",
text_color = "black",
value_col = "pct_clinton",
brewer_pal = "Blues",
font_size = 3,
legend_title = "Percent Clinton")
Figure 7.15.1: Manually specifying colors for statebins
##We specify the color scheme as a variable in the data frame instead of using a mapping to
## the ggplot() call
election <- election %>%
mutate(color = recode(party, Republican = "darkred",
Democrat = "royalblue"))
statebins_manual(state_data = election,
state_col = "st",
color_col = "color",
text_color = "white",
font_size = 3,
legend_title = "Winner",
labels = c("Trump", "Clinton"),
legend_position = "right")
Figure 7.15.2: Manually specifying colors for statebins
head(opiates) %>%
as_tibble() %>%
round_df(2)
## # A tibble: 6 x 11
## year state fips deaths population crude adjusted adjusted_se region
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <ord>
## 1 1999 Alab… 1 37 4430141 0.8 0.8 0.1 South
## 2 1999 Alas… 2 27 624779 4.3 4 0.8 West
## 3 1999 Ariz… 4 229 5023823 4.6 4.7 0.3 West
## 4 1999 Arka… 5 28 2651860 1.1 1.1 0.2 South
## 5 1999 Cali… 6 1474 33499204 4.4 4.5 0.1 West
## 6 1999 Colo… 8 164 4226018 3.9 3.7 0.3 West
## # ... with 2 more variables: abbr <chr>, division_name <chr>
us_states <-as_tibble(us_states)
opiates <- as_tibble(opiates) %>%
mutate(region_1 = tolower(state))
opiates_map <- left_join(us_states,opiates,by = c("region" = "region_1"))
Because the opiates data includes the year variable, it is possible to make a faceted small-multiple with one map for each year in the data.
The viridis colors run in low-to-high sequences and do a very good job of combining perceptually uniform colors with easy-to-see, easily contrasted hues along their scales.
Figure 7.16: A small-multiple map
library(viridis)
p0 <- ggplot(data = subset(opiates_map, year > 1999),
mapping = aes(x = long,
y = lat,
group = group,
fill = adjusted))
p1 <- p0 + geom_polygon(color = "gray90",size = 0.05) +
coord_map(projection = "albers",lat0 =39, lat1 = 45)
p2 <- p1 + scale_fill_viridis_c(option = "plasma")
p2 + theme_minimal() +
theme(
##Use numbers to justify the legend
legend.text = element_text(face = "bold",size = 8),
legend.title = element_text(size = 10),
legend.position = "bottom",
axis.title = element_blank(),
axis.text = element_blank(),
panel.grid = element_blank(),
strip.background = element_blank()) +
facet_wrap(~year, ncol = 3) +
labs(fill = "Death rate per 100,000 population",
title = "Opiate related deaths by state, 2000-2014")
Even if the data is collected via or grouped into spatial unites, it is always worth asking whether a map is the best way to represent it.
Much county, state, and national data is not properly spatial, insofar as it is really about individuals rather than the geographical distribution of those units per se.
Figure 7.17: Time-series plots of opiates in all states
p <- ggplot(data = opiates,
mapping = aes(x = year, y = adjusted,
group = state))
p + geom_line(color = "gray70")
Figure 7.18: Opiated data as a faceted time series
#Dropping rows that have missing observations on the specified variables
p0 <- ggplot(data = drop_na(opiates,division_name),
mapping = aes(x = year,
y = adjusted))
p1 <- p0 + geom_line(color = "gray70",
mapping = aes(group = state))
p2 <- p1 + geom_smooth(mapping = aes(group = division_name),
se = FALSE)
##Putting the label for each state in the end of the series using ggrepel
##Call to coord_cartesian serve to set the axis limits
p3 <- p2 +
geom_text_repel(data = subset(opiates,
year == max(year) & abbr !="DC"),
mapping = aes(x = year, y = adjusted,
label = abbr),
size = 1.8,segment.color = NA, nudge_x = 30) +
coord_cartesian(c(min(opiates$year),
max(opiates$year)))
p3 <- labs(x = "",
y = "Rate per 100,000 population",
title = "State-Level Opiate Death Rates by Census Division, 1999-2014") +
facet_wrap(~reorder(division_name, -adjusted, na.rm = TRUE),
nrow = 3)